In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [2]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [3]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [4]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[4]:

Currently data as of date: 2020-10-16T17:00:00


 

What's in the original dataframe?

In [9]:
md("All column names: {}".format(dat.columns.tolist()))
Out[9]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note']

In [10]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[10]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... variazione_totale_positivi nuovi_positivi dimessi_guariti deceduti casi_da_sospetto_diagnostico casi_da_screening totale_casi tamponi casi_testati note
4951 2020-10-16T17:00:00 ITA 19 Sicilia 38.115697 13.362357 471 58 529 5405 ... 447 578 4975 360 7238.0 4031.0 11269 583340 416980.0 NaN
4952 2020-10-16T17:00:00 ITA 9 Toscana 43.769231 11.255889 308 51 359 8156 ... 609 755 11315 1187 16194.0 4823.0 21017 890317 598910.0 NaN
4953 2020-10-16T17:00:00 ITA 10 Umbria 43.106758 12.388247 75 15 90 1871 ... 144 198 2158 91 1940.0 2270.0 4210 246365 145535.0 NaN
4954 2020-10-16T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 13 4 17 365 ... 22 27 1132 146 1499.0 161.0 1660 32618 22334.0 NaN
4955 2020-10-16T17:00:00 ITA 5 Veneto 45.434905 12.338452 299 40 339 7799 ... 530 704 23895 2244 22660.0 11617.0 34277 2121494 828067.0 NaN

5 rows × 21 columns


 

Variables names into English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [11]:
df.tail()
Out[11]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives NewPositives Recovered Deaths Diagnostico Screening TotalCases NoOfTests casi_testati note Pop
Date
2020-10-12 2020-10-12 Veneto 45.434905 12.338452 244 29 273 6016 6289 201 328 23323 2219 22250.0 9581.0 31831 2069955 808554.0 NaN 4905854
2020-10-13 2020-10-13 Veneto 45.434905 12.338452 272 33 305 6350 6655 366 485 23435 2226 22334.0 9982.0 32316 2074007 810366.0 NaN 4905854
2020-10-14 2020-10-14 Veneto 45.434905 12.338452 271 35 306 6876 7182 527 657 23565 2226 22436.0 10537.0 32973 2095102 818552.0 NaN 4905854
2020-10-15 2020-10-15 Veneto 45.434905 12.338452 279 40 319 7289 7608 426 600 23728 2237 22546.0 11027.0 33573 2114126 825788.0 NaN 4905854
2020-10-16 2020-10-16 Veneto 45.434905 12.338452 299 40 339 7799 8138 530 704 23895 2244 22660.0 11617.0 34277 2121494 828067.0 NaN 4905854

 

(double click and click on legend to select one or multiple regions in the graph)

In [12]:
df2 = df

fig = px.line(df2, x=df2.index, y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new cases, absolute numbers")
fig.show()
In [13]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="1-week MA of daily new cases")
fig.show()
In [14]:
df2['NewPos_pc'] = df2['NewPositives']/df2['Pop']*1000_000

df2['NewPos_pc'] = df2['NewPos_pc'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="NewPos_pc", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="1-week MA of daily new cases, per million")
fig.show()
In [15]:
df2['IC_pc'] = df2['IC']/df2['Pop']*1000_000

fig = px.line(df2, x="Date", y="IC_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current intensive care patients, per million")
fig.show()
In [16]:
df2['Hosp_pc'] = df2['HospTotal']/df2['Pop']*1000000

fig = px.line(df2, x="Date", y="Hosp_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current hospitalized, per million")
fig.show()
In [17]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of new deaths, absolute numbers")
fig.show()
In [18]:
df2['NewNoOfTests'] = df2['NoOfTests'] - df2.groupby(['Region'])['NoOfTests'].transform('shift')
df2.head()

df2['New_per_test'] = df2['NewPositives']/df2['NewNoOfTests']*100

fig = px.line(df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])], 
              x=df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])].index, y="New_per_test", color="Region", hover_name="Region",
        render_mode="svg", log_y=True, line_shape='spline')
fig.update_layout(title="New positive cases in daily tests in Northern regions, %")
fig.show()
In [19]:
df2['Deaths_per_mio'] = (df2['Deaths']/df2['Pop'])*1000_000
fig = px.line(df2, x="Date", y="Deaths_per_mio", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per million")
fig.show()
In [20]:
df2['Change_per_mio'] = df2['VariationOfPositives']/df2['Pop']*1000_000
df2['Change_per_mio'] = df2['Change_per_mio'].rolling(window=7).mean()


# [df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])]
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_mio", color="Region", hover_name="Date")
fig.update_layout(title="1-week MA of current positive cases, per million (excl. Valle d'Aosta)")
fig.show()

 

Italy as a whole

Data from all regions aggregated

In [21]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of daily new positive cases, current IC patients and total hospitalized")
fig.show()
In [ ]: